BuildNetwork

library(AnNet)
#> Loading required package: igraph
#> 
#> Attaching package: 'igraph'
#> The following objects are masked from 'package:stats':
#> 
#>     decompose, spectrum
#> The following object is masked from 'package:base':
#> 
#>     union
#> Loading required package: poweRlaw
#> Loading required package: org.Hs.eg.db
#> Loading required package: AnnotationDbi
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> 
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:igraph':
#> 
#>     normalize, path, union
#> The following objects are masked from 'package:stats':
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#> 
#>     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
#>     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
#>     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
#>     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
#>     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
#>     union, unique, unsplit, which.max, which.min
#> Loading required package: Biobase
#> Welcome to Bioconductor
#> 
#>     Vignettes contain introductory material; view with
#>     'browseVignettes()'. To cite Bioconductor, see
#>     'citation("Biobase")', and for packages 'citation("pkgname")'.
#> Loading required package: IRanges
#> Loading required package: S4Vectors
#> Warning: package 'S4Vectors' was built under R version 4.1.3
#> 
#> Attaching package: 'S4Vectors'
#> The following objects are masked from 'package:base':
#> 
#>     expand.grid, I, unname
#> 
#> Loading required package: latex2exp
#> Loading required package: RSpectra
#> 
#> snapshotDate(): 2022-02-22
#> loading from cache
#> 
#> Attaching package: 'AnNet'
#> The following object is masked from 'package:igraph':
#> 
#>     permute
library(synaptome.db)
#> Loading required package: synaptome.data
#> Loading required package: AnnotationHub
#> Loading required package: BiocFileCache
#> Loading required package: dbplyr
#> 
#> Attaching package: 'AnnotationHub'
#> The following object is masked from 'package:Biobase':
#> 
#>     cache
library(ggplot2)
library(synaptome.db)
library(pander)
library(ggrepel)

Title AnNet: library for comprehensive network analysis

Authors: Colin Mclean, Anatoly Sorokin, J. Douglas Armstrong and Oksana Sorokina

#Introduction

Overview of capabilities

Build the network

The package allows building the network from the data frame, where the rows correspond to the edges of the graph, or for the list of nodes (genes), for which the information will be retrieved from the SynaptomeDB.

Build the network from data frame

The command builds the graph from provided data frame, simplifies the graph (removing multiple edges and loops) and finds the MCC (maximum connected component)


file <- system.file("extdata", "PPI_Presynaptic.csv", package = "AnNet")
tbl <- read.csv(file, sep="\t")
head(tbl)
#>   entA  entB We
#> 1  273  1759  1
#> 2  273  1785  1
#> 3  273 26052  1
#> 4  273   824  1
#> 5  273   161  1
#> 6  273  1020  1
gg <- buildNetwork(tbl)
summary(gg)
#> IGRAPH 906f9b2 UN-- 1780 6620 -- 
#> + attr: name (v/c)

Build network for the node list extracted from SynaptomeDB


cid<-match('Presynaptic',getCompartments()$Name) # Let's get the ID for presynaptic compartment
cid
#> [1] 2
t<-getAllGenes4Compartment(cid) # Now we need to collect all the gene IDs for presinaptic  compartment
dim(t)
#> [1] 2304    8
head(t)
#> # A tibble: 6 × 8
#>   GeneID MGI       HumanEntrez MouseEntrez RatEntrez HumanName MouseName RatName
#>    <int> <chr>           <int>       <int>     <int> <chr>     <chr>     <chr>  
#> 1      1 MGI:1277…        1742       13385     29495 DLG4      Dlg4      Dlg4   
#> 2      2 MGI:88256         815       12322     25400 CAMK2A    Camk2a    Camk2a 
#> 3      3 MGI:96568        9118      226180     24503 INA       Ina       Ina    
#> 4      4 MGI:98388        6711       20742    305614 SPTBN1    Sptbn1    Sptbn1 
#> 5      5 MGI:88257         816       12323     24245 CAMK2B    Camk2b    Camk2b 
#> 6      6 MGI:1344…        1740       23859     64053 DLG2      Dlg2      Dlg2
gg<-buildFromSynaptomeByEntrez(t$HumanEntrez) # finally, build the graph using respecctive gene EntrezIDs as node IDs
summary(gg)
#> IGRAPH 8ddb28e UN-- 1780 6620 -- 
#> + attr: name (v/c)

Use the predifined network


file <- system.file("extdata", "PPI_Presynaptic.gml", package = "AnNet")
gg1 <- igraph::read.graph(file,format="gml")
summary(gg1)
#> IGRAPH d04bca5 UN-- 2169 8673 -- 
#> + attr: id (v/n), name (v/c), GeneName (v/c), UniProt (v/c), louvain
#> | (v/n), TopOntoOVG (v/c), TopOntoOVGHDOID (v/c)

Annotate the nodes with node attributes

Gene name

Adding gene names to nodes.


gg<-annotateGeneNames(gg)
#> 'select()' returned 1:1 mapping between keys and columns
summary(gg)
#> IGRAPH 8ddb28e UN-- 1780 6620 -- 
#> + attr: name (v/c), GeneName (v/c)
head(V(gg))
#> + 6/1780 vertices, named, from 8ddb28e:
#> [1] 273   6455  1759  1785  26052 2923
head(V(gg)$GeneName)
#> [1] "AMPH"   "SH3GL1" "DNM1"   "DNM2"   "DNM3"   "PDIA3"

Diseases

Adding diseases associations from HDO database

afile<-system.file("extdata", "flatfile_human_gene2HDO.csv", package = "AnNet")
dis    <- read.table(afile,sep="\t",skip=1,header=F,strip.white=T,quote="")
gg <- annotate_topOnto_ovg(gg, dis)
summary(gg)
#> IGRAPH 8ddb28e UN-- 1780 6620 -- 
#> + attr: name (v/c), GeneName (v/c), TopOnto_OVG (v/c),
#> | TopOnto_OVG_HDO_ID (v/c)

##Shanno Adding synaptic function annotated from еру Shanno et al.,paper

sfile<-system.file("extdata", "SCH_flatfile.csv", package = "AnNet")
shan   <- read.table(sfile,sep="\t",skip=1,header=F,strip.white=T,quote="")
sgg<-annotate_SCHanno(gg,shan)
summary(sgg)
#> IGRAPH 8ddb28e UN-- 1780 6620 -- 
#> + attr: name (v/c), GeneName (v/c), TopOnto_OVG (v/c),
#> | TopOnto_OVG_HDO_ID (v/c), Schanno (v/c)

Presynaptic functiion annotation

This is manually curated presynaptic genes specific functional annotation based on the paper of Boyken at al., 2019

{r annotate_Chua, eval=FALSE}
sfile<-system.file("extdata", "PresynAn.csv", package = "AnNet")
pres <- read.table(sfile,sep="\t",skip=1,header=F,strip.white=T,quote="")
sgg <- annotate_CHUA(gg, pres)
summary(sgg)

GO

Adding functionality from GO: BP, MF,CC

sfile<-system.file("extdata", "flatfile.go.BP.csv", package = "AnNet")
goBP <- read.table(sfile,sep="\t",skip=1,header=F,strip.white=T,quote="")
sgg <- annotate_go_bp(gg, goBP)
summary(sgg)
#> IGRAPH 8ddb28e UN-- 1780 6620 -- 
#> + attr: name (v/c), GeneName (v/c), TopOnto_OVG (v/c),
#> | TopOnto_OVG_HDO_ID (v/c), GO_BP_ID (v/c), GO_BP (v/c)

sfile<-system.file("extdata", "flatfile.go.MF.csv", package = "AnNet")
goMF <- read.table(sfile,sep="\t",skip=1,header=F,strip.white=T,quote="")
sgg <- annotate_go_mf(gg, goMF)
summary(sgg)
#> IGRAPH 8ddb28e UN-- 1780 6620 -- 
#> + attr: name (v/c), GeneName (v/c), TopOnto_OVG (v/c),
#> | TopOnto_OVG_HDO_ID (v/c), GO_MF_ID (v/c), GO_MF (v/c)

sfile<-system.file("extdata", "flatfile.go.CC.csv", package = "AnNet")
goCC <- read.table(sfile,sep="\t",skip=1,header=F,strip.white=T,quote="")
sgg <- annotate_go_cc(gg, goCC)
summary(sgg)
#> IGRAPH 8ddb28e UN-- 1780 6620 -- 
#> + attr: name (v/c), GeneName (v/c), TopOnto_OVG (v/c),
#> | TopOnto_OVG_HDO_ID (v/c), GO_CC_ID (v/c), GO_CC (v/c)

#Esimate node centrality measures ## Estimate centrality measures with values added as node attributes. Centrality measures are as following:DEG - degree, BET - betweenness, CC - clustering coefficient, SL - semilocal centrality, mnSP - mean shortest path, PR - page rank, sdSP - standard deviation of the shortest path

gg <- calcCentrality(gg)
summary(gg)
#> IGRAPH 8ddb28e UN-- 1780 6620 -- 
#> + attr: name (v/c), GeneName (v/c), TopOnto_OVG (v/c),
#> | TopOnto_OVG_HDO_ID (v/c), DEG (v/c), BET (v/c), CC (v/c), SL (v/c),
#> | mnSP (v/c), PR (v/c), sdSP (v/c)

##Get node centralities as a matrix. The same centrality measures as before are presented in a matrix form.

mc <- getCentralityMatrix(gg)
head(mc)
#>      ID      DEG  BET         CC      SL       mnSP    PR         sdSP   
#> [1,] "273"   "12" "833.233"   "0.106" "44656"  "3.424" "0.000786" "0.707"
#> [2,] "6455"  "22" "5444.71"   "0.082" "170211" "2.955" "0.001337" "0.683"
#> [3,] "1759"  "16" "1744.932"  "0.217" "150771" "3.062" "0.000962" "0.687"
#> [4,] "1785"  "27" "10994.509" "0.071" "218717" "2.89"  "0.001644" "0.701"
#> [5,] "26052" "6"  "291.138"   "0.133" "55060"  "3.503" "0.000405" "0.743"
#> [6,] "2923"  "7"  "1225.709"  "0.095" "55104"  "3.391" "0.000531" "0.758"

##Get the centrality measures for random graph Sometimes one needs to compare the graph properties of the given network with the ones of the randomized graph. Command below provides three ways of randomization, including G(n,p) Erdos-Renyi model , Barabasi-Albert model and new random graph from a given graph by randomly adding/removing edges.

{r}
ggrm <- getRandomGraphCentrality(sgg, type = c("cgnp"))
head(ggrm)

Power law fit

To examine the networks for underlying structure (i.e. not random), one can test network’s degree distribution for evidence of scale-free structure and compare it against the randomised network model. For this we used the R “PoweRlaw” package (version 0.50.0) (Gillespie, 2015). In the example below we found evidence for disassortative mixing (Newman, 2002), i.e. a preference for high-degree genes to attach to low-degree gene(presynaptic: -0.16).

pFit <- FitDegree( as.vector(igraph::degree(graph=sgg)),plot=TRUE,WIDTH=2480, HEIGHT=2480)

## Get entropy rate Evidence for scale-free structure can also be tested by performing a perturbation analysis on each network (Teschendorff et al, 2014). In this analysis each protein was perturbed through over-expression (red) and under-expression (green), with the global entropy rate (SR) after each proteins pertubation being plotted against the log of the proteins degree, as shown at the plot below. In the example network we observed a bi-modal response, between gene over-expression and degree, and opposing bi-phasic response relative to over/under-expression between global entropy rate and degree. This type of bi-modal, bi-phasic behaviour has been observed only in networks with scale-free or approximate scale-free topology (Teschendorff et al, 2014)[80].

ent <- getEntropyRate(gg)
ent
#> $maxSr
#> [1] 3.822764
#> 
#> $SRo
#> [1] 2.639026
SRprime <- getEntropy(gg, maxSr = NULL)
head(SRprime)
#>      ENTREZ.ID GENE.NAME DEGREE UP                  DOWN               
#> [1,] "273"     "AMPH"    "12"   "0.6877989113741"   "0.690315321671207"
#> [2,] "6455"    "SH3GL1"  "22"   "0.688466483598983" "0.689906842691128"
#> [3,] "1759"    "DNM1"    "16"   "0.689220589967989" "0.689957716069586"
#> [4,] "1785"    "DNM2"    "27"   "0.689632772969398" "0.689577514448685"
#> [5,] "26052"   "DNM3"    "6"    "0.689143168786511" "0.690296867048524"
#> [6,] "2923"    "PDIA3"   "7"    "0.688882651999579" "0.690313211229183"
plotEntropy(SRprime, subTIT = "Entropy", SRo = ent$SRo, maxSr = ent$maxSr)
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.

## Get modularity. Normalised modularity. Normalised modularity allows comparison the networks with different structure. Qm based on the previous studies by Parter et al., 2007, Takemoto, 2012,Takemoto, 2013, Takemoto and Borjigin, 2011, which was defined as: Qm = (Qreal-Qrand)/(Qmax-Qrand) Where Qreal is the network modularity of a real-world signaling network and,Qrand is the average network modularity value obtained from 10,000 randomized networks constructed from its real-world network. Qmax was estimated as: 1 − 1/M, where M is the number of modules in the real network. Randomized networks were generated from a real-world network using the edge-rewiring algorithm (Maslov and Sneppen, 2002).

#' cid<-match('Presynaptic',getCompartments()$Name)
#' t<-getAllGenes4Compartment(cid)
#' gg<-buildFromSynaptomeByEntrez(t$HumanEntrez)
nm<-normModularity(gg,alg='louvain')
nm
#> [1] 0.02605522

Clustering

Clustering, or community detection, in networks has been well studied in the field of statistical physics with particular attention to methods developed for social science networks. The underlying assumption(s) of what makes a community in social science, translates remarkably well to what we think of as a community (sub-complex, module or cluster) in PPI networks The possible algorithms of choice implemented in the package are: “lec”(‘Leading-Eigenvector, Newman, 2006), “wt”(Walktrap, Pons & Latapy, 2006), “fc”(Fast-Greedy Community’ algorithm, Clauset et al., 2004), “infomap” (InfoMAP, Rosvall et al., 2007; Rosvall et al., 2010), “louvain” (Louvain, Blondel et al., 2008), “sgG1”, “sgG2”, “sgG5”(SpinGlass, Reichardt & Bornholdt). For each algorithm of interest the membership could be obtained with calcMembership command. All algorithm implementations, apart from Spectral were performed using the publicly available R package igraph (Csardi & Nepusz, 2006) (R version 3.4.2, igraph version 1.1.2). Parameters used in the fc, lec, sg, wt and lourvain algorithms were chosen as to maximise the measure Modularity (Newman & Girvan, 2004); infomap seeks the optimal community structure in the data by maximising the objective function called the Minimum Description Length (Rissanen, 1978; Grwald et al., 2005)

calcAllClustering allows to calculate memberships for all clustering algorithms simultaneously (may take time), and store them on the graph vertices. In this case, calcMembership command will return the community membership for specific algorithm.

Alternatively, calcClustering allows to calculate clustering for selected algorithm.

#ggc <- calcAllClustering(gg)
#summary(ggc)

#mem <- calcMembership(ggc, alg = c("lec"))# choose one algorithm from the list
#head(mem)

alg = "louvain"
gg <- calcClustering(gg, alg)
gg
#> IGRAPH 8ddb28e UN-- 1780 6620 -- 
#> + attr: name (v/c), GeneName (v/c), TopOnto_OVG (v/c),
#> | TopOnto_OVG_HDO_ID (v/c), DEG (v/c), BET (v/c), CC (v/c), SL (v/c),
#> | mnSP (v/c), PR (v/c), sdSP (v/c), louvain (v/c)
#> + edges from 8ddb28e (vertex names):
#>  [1] 273 --1759   273 --1785   273 --26052  273 --824    273 --161   
#>  [6] 273 --1020   273 --5530   273 --274    273 --5532   273 --25977 
#> [11] 273 --8867   273 --9901   6455--1759   6455--1785   6455--26052 
#> [16] 6455--6456   6455--7534   6455--10015  6455--6457   6455--7431  
#> [21] 6455--392    6455--23176  6455--5689   6455--10570  6455--10059 
#> [26] 6455--6812   6455--3778   6455--57030  6455--114569 6455--192683
#> + ... omitted several edges

Consensus matrix

For assessing the robustness of the clustering randomization study could be performed, which applies the same clustering algorithm to N perturbed networks and returns the consensus matrix where each pair of nodes will be assigned the probability to belong to the same cluster.

conmat <- makeConsensusMatrix(gg, N=100,alg = alg, type = 2, mask = 10,reclust = FALSE, Cnmin = -1, Cnmax = 10)#Build consensus matrix for louvain clustering

##Consensus matrix value distribution Consensus matrix value distribution could be visualised in the following way:

steps <- 100
Fn  <- ecdf(conmat[lower.tri(conmat)])
X<-seq(0,1,length.out=steps+1)
cdf<-Fn(X)
dt<-data.frame(cons=X,cdf=cdf)
ggplot(dt,aes(x=cons,y=cdf))+geom_line()+
      theme(            
        axis.title.x=element_text(face="bold",size=rel(2.5)),
        axis.title.y=element_text(face="bold",size=rel(2.5)),
        legend.title=element_text(face="bold",size=rel(1.5)),
        legend.text=element_text(face="bold",size=rel(1.5)),
        legend.key=element_blank())+
    theme(panel.grid.major = element_line(colour="grey40",size=0.2),
          panel.grid.minor = element_line(colour="grey40",size=0.1),
          panel.background = element_rect(fill="white"),
          panel.border = element_rect(linetype="solid",fill=NA))

## Cluster robustness Cluster robustness assess the robustness of the obtained clusters and could help to evaluate the “goodness” of chosen clustering algorithm.

clrob<-getRobustness(gg, alg = alg, conmat)
pander(clrob)
alg C Cn Crob CrobScaled
louvain 1 163 0.09295 0.2793
louvain 2 207 0.08939 0.1456
louvain 3 224 0.08654 0.03879
louvain 4 194 0.08864 0.1175
louvain 5 65 0.09194 0.2415
louvain 6 151 0.09128 0.2164
louvain 7 117 0.08858 0.1151
louvain 8 99 0.09016 0.1746
louvain 9 89 0.0867 0.04472
louvain 10 124 0.08585 0.01266
louvain 11 116 0.08706 0.05805
louvain 12 47 0.0864 0.03345
louvain 13 58 0.09294 0.279
louvain 14 87 0.08551 0
louvain 15 39 0.1122 1

Bridgeness

Bridging proteins are known to interact with many neighbours simultaneously, organise function inside communities they belong to, but also affect/influence other communities in the network (Nepusz et al., 2008).Bridgeness is estimated from the consensus clustering estimated above and vertex degree to calculate the vertex’s community membership, i.e. the probability of the specific node to belong to every communities obtained by given clustering algorithm. The Bridgeness measure lies between 0, implying a vertex clearly belongs in a single community, and 1, implying a vertex forms a ‘global bridge’ across every community with the same strength.

br<-getBridgeness(gg,alg = alg,conmat)
#> calculating Bridgeness for:  louvain
pander(head(br))
ENTREZ.ID GENE.NAME BRIDGENESS.louvain
273 AMPH 0.0938476465095556
6455 SH3GL1 0.478160277229497
1759 DNM1 0.0184896145823757
1785 DNM2 0.427626561008655
26052 DNM3 0.150920248660597
2923 PDIA3 0.581464396485594

Bridgeness plot

Semi-local centrality measure (Chen et al., 2011) also lies between 0 and 1 indicating whether protein is important globally or locally. By plotting Bridgeness against semi-local centrality we can categorises the influence each protein found in our network has on the overall network structure: Region 1, proteins having a ‘global’ rather than ‘local’ influence in the network (also been called bottle-neck bridges, connector or kinless hubs (0<Sl<0.5; 0.5<Br<1). Region 2, proteins having ‘global’ and ‘local’ influence (0.5<Sl<1, 0.5<Br<1). Region 3, proteins centred within the community they belong to, but also communicating with a few other specific communities (0<Sl<0.5; 0.1<Br<0.5). Region 4, proteins with ‘local’ impact , primarily within one or two communities (local or party hubs, 0.5<Sl<1, 0<Br<0.5).

VIPs=c('8495','22999','8927','8573','26059','8497','27445','8499')
# VIPs=c('81876','10890','51552','5874','5862','11021','54734','5865','5864',
#            '9522','192683','10067','10396','9296','527','9114','537','535',
#            '528','51382','534','51606','523','80331','114569','127262','57084',
#            '57030','388662','6853','6854','8224','9900','9899','9145','9143',
#            '6855','132204','6857','127833','6861','529','526','140679','7781',
#            '81615','6844','6843')
indx   <- match(V(gg)$name,VIPs)
group <- ifelse( is.na(indx), 0,1)
MainDivSize <- 0.8
xmin        <- 0
xmax        <- 1
ymin        <- 0
ymax        <- 1
Xlab <- "Semilocal Centrality (SL)" 
Ylab <- "Bridgeness (B)"
X    <- as.numeric(igraph::get.vertex.attribute(gg,"SL",V(gg)))
X    <- scale(X)
Y       <- as.numeric(as.vector(br[,dim(br)[2]])) 
lbls <- ifelse(!is.na(indx),V(gg)$GeneName,"")
dt<-data.frame(X=X,Y=Y,vips=group,entres=V(gg)$name,name=V(gg)$GeneName)
dt_vips<-dt[dt$vips==1,]
dt_res<-dt[dt$vips==0,]
##--- baseColor of genes
baseColor="royalblue2"

##--- SPcolor, colour highlighting any 'specical' genes
SPColor="royalblue2"

##--- PSDColor, colour of core PSD95 interactor genes
PSDColor="magenta"

ggplot(dt,aes(x=X,y=Y,label=name))+#geom_point()+
    geom_point(data=dt_vips,
               aes(x=X,y=Y),colour=baseColor,size = 7,shape=15,show.legend=F)+
    geom_point(data=dt_res,
               aes(x=X,y=Y, alpha=(X*Y)), size = 3,shape=16,show.legend=F)+
    geom_label_repel(aes(label=as.vector(lbls)),
                     fontface='bold',color='black',fill='white',box.padding=0.1,
                     point.padding=NA,label.padding=0.15,segment.color='black',
                     force=1,size=rel(3.8),show.legend=F,max.overlaps=200)+
      labs(x=Xlab,y=Ylab,title=sprintf("%s",alg))+
    scale_x_continuous(expand = c(0, 0), limits = c(xmin, xmax)) + 
    scale_y_continuous(expand = c(0, 0), limits = c(ymin, ymax))+
    theme(            
        axis.title.x=element_text(face="bold",size=rel(2.5)),
        axis.title.y=element_text(face="bold",size=rel(2.5)),
        legend.title=element_text(face="bold",size=rel(1.5)),
        legend.text=element_text(face="bold",size=rel(1.5)),
        legend.key=element_blank())+
    theme(panel.grid.major = element_line(colour="grey40",size=0.2),
          panel.grid.minor = element_line(colour="grey40",size=0.1),
          panel.background = element_rect(fill="white"),
          panel.border = element_rect(linetype="solid",fill=NA))+
        geom_vline(xintercept=0.5,colour="grey40",size=MainDivSize,linetype=2,show.legend=F)+
    geom_hline(yintercept=0.5,colour="grey40",size=MainDivSize,linetype=2,show.legend=F)
#> Warning: Removed 6 rows containing missing values (geom_point).
#> Warning: Removed 1384 rows containing missing values (geom_point).
#> Warning: Removed 1390 rows containing missing values (geom_label_repel).

##Interactive view of bridgeness plot

library(plotly)
#> 
#> Attaching package: 'plotly'
#> The following object is masked from 'package:ggplot2':
#> 
#>     last_plot
#> The following object is masked from 'package:latex2exp':
#> 
#>     TeX
#> The following object is masked from 'package:AnnotationDbi':
#> 
#>     select
#> The following object is masked from 'package:IRanges':
#> 
#>     slice
#> The following object is masked from 'package:S4Vectors':
#> 
#>     rename
#> The following object is masked from 'package:igraph':
#> 
#>     groups
#> The following object is masked from 'package:stats':
#> 
#>     filter
#> The following object is masked from 'package:graphics':
#> 
#>     layout
p<-ggplot(dt,aes(x=X,y=Y,label=name))+#geom_point()+
    geom_point(data=dt_vips,
               aes(x=X,y=Y),colour=baseColor,shape=15,show.legend=F)+
    geom_point(data=dt_res,
               aes(x=X,y=Y, alpha=(X*Y)),shape=16,show.legend=F)+
      labs(x=Xlab,y=Ylab,title=sprintf("%s",alg))+
    scale_x_continuous(expand = c(0, 0), limits = c(xmin, xmax)) + 
    scale_y_continuous(expand = c(0, 0), limits = c(ymin, ymax))+
    theme(            
        axis.title.x=element_text(face="bold",size=rel(2.5)),
        axis.title.y=element_text(face="bold",size=rel(2.5)),
        legend.title=element_text(face="bold",size=rel(1.5)),
        legend.text=element_text(face="bold",size=rel(1.5)),
        legend.key=element_blank())+
    theme(panel.grid.major = element_line(colour="grey40",size=0.2),
          panel.grid.minor = element_line(colour="grey40",size=0.1),
          panel.background = element_rect(fill="white"),
          panel.border = element_rect(linetype="solid",fill=NA))+
        geom_vline(xintercept=0.5,colour="grey40",size=MainDivSize,linetype=2,show.legend=F)+
    geom_hline(yintercept=0.5,colour="grey40",size=MainDivSize,linetype=2,show.legend=F)
ggplotly(p)
#> Warning: `gather_()` was deprecated in tidyr 1.2.0.
#> Please use `gather()` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

Disease/annotation pairs

Given that Disease associated genes are connected within the graph, the common question is to check whether the networks for two different diseases are overlapping, which may indicate the common molecular mechanisms. Same is valid for any pair of annotations, e.g. one would ask if two different biological functions are related. To address the question, we have utilised the algorithm from Menche et al, which estimates the minimal shortest paths between two distinct annotations and compare it to the randomly annotated graph. Below example shows the estimation of disease separation for the following diseases: DOID:10652 (Alzheimer’s_disease), (bipolar_disorder), DOID:12849 (autistic_disorder), DOID:1826 (epilepsy) Command calcDiseasePairs quickly estimates the two annotation separation on the graph and compares it with one randomly reannotated graph. This could be used for initial guess of the relationships between the annotations. To assess the significance of the obtained separation values the command runPermDisease should be used, where the user can dfine the number of permutations. The command execution will take a while, depending on this number and return the table with p-value, p.adjusted, q-value and Bonferroni test.

p <- calcDiseasePairs(
    gg,
    name = "TopOnto_OVG_HDO_ID",
    diseases = c("DOID:10652","DOID:3312","DOID:12849"),
    permute = "r"
)
pander(p$disease_separation)
  DOID:10652 DOID:3312 DOID:12849
DOID:10652 0 -0.1262 0.009981
DOID:3312 NA 0 -0.1249
DOID:12849 NA NA 0

r <- runPermDisease(
    gg,
    name = "TopOnto_OVG_HDO_ID",
    diseases = c("DOID:10652","DOID:3312","DOID:12849","DOID:1826"),
    Nperm = 100,
    alpha = c(0.05, 0.01, 0.001)
)

pander(r$Disease_overlap_sig)
Table continues below
HDO.ID N HDO.ID N sAB Separated Overlap
DOID:10652 259 DOID:10652 259 0 . .
DOID:10652 259 DOID:3312 127 -0.249547559530832 . YES
DOID:10652 259 DOID:12849 95 -0.102817396216147 . YES
DOID:10652 259 DOID:1826 138 -0.284488456101642 . YES
DOID:3312 127 DOID:3312 127 0 . .
DOID:3312 127 DOID:12849 95 -0.366460202432022 . YES
DOID:3312 127 DOID:1826 138 -0.257697996938242 . YES
DOID:12849 95 DOID:12849 95 0 . .
DOID:12849 95 DOID:1826 138 -0.318736802820636 . YES
DOID:1826 138 DOID:1826 138 0 . .
Table continues below
zScore pvalue Separation/Overlap.than.chance
0 1 larger
-2.00776703602753 0.0446680532767752 Smaller
-0.865094412556766 0.386986971211123 Smaller
-3.14633064385253 0.00165332980625333 Smaller
0 1 larger
-3.7357241762782 0.000187175745854292 Smaller
-2.13281673628389 0.0329397627784495 Smaller
0 1 larger
-3.51054019849682 0.000447197187033641 Smaller
0 1 larger
Bonferroni p.adjusted q-value
. 1 1
. 0.261662620028474 0.0893361065535504
. 1 0.644978285351872
* 0.0161418350528516 0.00551109935417777
. 1 1
** 0.00548231817520051 0.00187175745854292
. 0.241198798678309 0.0823494069461237
. 1 1
** 0.00654913182042719 0.0022359859351682
. 1 1